# -*- coding: utf-8 -*-
import scrapy
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from datetime import datetime
from zc_core.model.items import Box
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from tjgpc.rules import parse_sku, parse_total_page
from zc_core.spiders.base import BaseSpider


class SkuSpider(BaseSpider):
    name = 'sku'
    # 常用链接
    sku_list_url = 'http://111.164.113.185:8090/godTotalGoods/web_searchGoods.do?page={}'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)

    def start_requests(self):
        page = 1
        yield Request(
            method='POST',
            url=self.sku_list_url.format(page),
            callback=self.parse_total_page,
            errback=self.error_back,
            meta={
                'batchNo': self.batch_no,
                'page': page
            },
            dont_filter=True
        )

    # 处理sku列表总页数
    def parse_total_page(self, response):
        # 处理品类列表
        pages = parse_total_page(response)
        if pages:
            self.logger.info('总页数: pages=%s' % pages)
            for page in range(1, pages + 1):
                # 采集sku列表
                yield Request(
                    method='POST',
                    url=self.sku_list_url.format(page),
                    callback=self.parse_sku,
                    errback=self.error_back,
                    meta={
                        'batchNo': self.batch_no,
                        'page': page
                    },
                    dont_filter=True
                )

    # 处理sku列表
    def parse_sku(self, response):
        meta = response.meta
        cur_page = meta.get('page', 1)
        # 商品
        sku_list = parse_sku(response)
        if sku_list:
            self.logger.info('清单: page=%s, cnt=%s' % (cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)
        else:
            self.logger.info('分页为空: page=%s' % cur_page)
